!*******************************************************************************
!>
!  Module for [[bvls]].
!
!### History
!  * Original code from http://www.netlib.org/lawson-hanson/all
!  * Jacob Williams, Nov 2018 : refactored into a module. some code cleanup.

    module bvls_module

    use slsqp_kinds,   only: wp
    use slsqp_support, only: zero,one,two

    implicit none

    private

    public :: bvls          ! main routine
    public :: bvls_wrapper  ! a wrapper with the same interface as nnls

    contains
!*******************************************************************************

!*******************************************************************************
!>
!  Call [[bvls]], but matching the interface of the old [[nnls]].

    subroutine bvls_wrapper(a,mda,m,n,b,x,rnorm,w,zz,mode,max_iter)

    implicit none

    integer,intent(in)                      :: mda
    integer,intent(in)                      :: n
    real(wp),dimension(mda,n),intent(inout) :: a
    integer,intent(in)                      :: m
    real(wp),dimension(m),intent(inout)     :: b
    real(wp),dimension(n),intent(out)       :: x
    real(wp),intent(out)                    :: rnorm
    real(wp),dimension(n),intent(inout)     :: w
    real(wp),dimension(m),intent(inout)     :: zz
    integer,intent(out)                     :: mode
    integer,intent(in)                      :: max_iter !! maximum number of iterations
                                                        !! (if <=0, then `3*n` is used)

    integer,dimension(n) :: index
    integer :: ierr
    integer :: nsetp
    real(wp),dimension(2,n) :: bnd  !! BND(1,J) is the lower bound for X(J).
                                    !! BND(2,J) is the upper bound for X(J).

    ! set bounds for nnls:
    bnd(1,:) = 0.0_wp
    bnd(2,:) = huge(0.0_wp)

    call bvls (a, b, bnd, x, rnorm, nsetp, w, index, ierr, max_iter)

    select case (ierr)
    case(0)
        mode = 1
    case(1:2)
        mode = 2
    case(3)
        ! doesn't exist in nnls... but will never happen because
        ! we are setting bounds above
        mode = -999
    case(4)
        mode = 3
    case default
        mode = -9999
        !error stop 'unknown output from bvls'
    end select

    end subroutine bvls_wrapper
!*******************************************************************************

!*******************************************************************************
!>
!  Given an m by n matrix, \(\mathbf{A}\), and an m-vector, \(\mathbf{b}\),
!  compute an n-vector, \(\mathbf{x}\), that solves the least squares problem:
!
!  \( \mathbf{A} \mathbf{x} = \mathbf{b}\)
!
!  subject to \( \mathbf{x} \) satisfying:
!
!  \( \mathbf{x}_l \le \mathbf{x} \le \mathbf{x}_u \)
!
!  This algorithm is a generalization of [[NNLS]], that solves
!  the least-squares problem, `A * X = B`, subject to all `X(J) >= 0`.
!
!### History
!  * The subroutine [[NNLS]] appeared in 'Solving least squares problems,'
!    by Lawson and Hanson, Prentice-Hall, 1974.  Work on BVLS was started
!    by C. L. Lawson and R. J. Hanson at Jet Propulsion Laboratory,
!    1973 June 12.  Many modifications were subsequently made.
!    This Fortran 90 code was completed in April, 1995 by R. J. Hanson.

    subroutine bvls (a, b, bnd, x, rnorm, nsetp, w, index, ierr, max_iter)

    implicit none

    real(wp),dimension(:,:),intent(inout) :: a  !! On entry A() contains the M by N matrix, A.
                                                !! On return A() contains the product matrix, Q*A, where
                                                !! Q is an M by M orthogonal matrix generated by this
                                                !! subroutine.  The dimensions are M=size(A,1) and N=size(A,2).
    real(wp),dimension(:),intent(inout)   :: b  !! On entry B() contains the M-vector, B.
                                                !! On return, B() contains Q*B.  The same Q multiplies A.
    real(wp),dimension(:,:),intent(in) :: bnd   !! BND(1,J) is the lower bound for X(J).
                                                !! BND(2,J) is the upper bound for X(J).
                                                !!
                                                !! Require:  BND(1,J)  <=  BND(2,J).
                                                !!
                                                !! The values BND(1,J) = -huge(ONE) and BND(2,J) = huge(ONE) are
                                                !! suggested choices to designate that there is no constraint in that
                                                !! direction.  The parameter ONE is 1.0 in the working precision.
    real(wp),dimension(:),intent(out)   :: x    !! On entry X() need not be initialized.  On return,
                                                !! X() will contain the solution N-vector.
    real(wp),intent(out)                :: rnorm !! Euclidean norm of the residual vector, b - A*X.
    integer,intent(out)                 :: nsetp !! Indicates the number of components of the solution
                                                 !! vector, X(), that are not at their constraint values.
    real(wp),dimension(:),intent(out)   :: w    !! An N-array.  On return, W() will contain the dual solution
                                                !! vector.   Using Set definitions below:
                                                !!
                                                !! * W(J) = 0 for all j in Set P,
                                                !! * W(J)  <=  0 for all j in Set Z, such that X(J) is at its
                                                !!   lower bound, and
                                                !! * W(J)  >=  0 for all j in Set Z, such that X(J) is at its
                                                !!   upper bound.
                                                !!
                                                !! If BND(1,J) = BND(2,J), so the variable X(J) is fixed,
                                                !! then W(J) will have an arbitrary value.
    integer,dimension(:),intent(out)    :: index    !! An INTEGER working array of size N.  On exit the contents
                                                    !! of this array define the sets P, Z, and F as follows:
                                                    !!
                                                    !! * INDEX(1)   through INDEX(NSETP)    =  Set P.
                                                    !! * INDEX(IZ1) through INDEX(IZ2)      = Set Z.
                                                    !! * INDEX(IZ2+1) through INDEX(N)      = Set F.
                                                    !!
                                                    !! IZ1 = NSETP + 1 = NPP1
                                                    !!
                                                    !! Any of these sets may be empty.  Set F is those components
                                                    !! that are constrained to a unique value by the given
                                                    !! constraints.   Sets P and Z are those that are allowed a non-
                                                    !! zero range of values.  Of these, set Z are those whose final
                                                    !! value is a constraint value, while set P are those whose
                                                    !! final value is not a constraint.  The value of IZ2 is not returned.
                                                    !!
                                                    !! It is computable as the number of bounds constraining a component
                                                    !! of X uniquely.
    integer,intent(out)                 :: ierr !! Indicates status on return:
                                                !!
                                                !! * 0 -- Solution completed.
                                                !! * 1 -- M  <=  0 or N  <=  0
                                                !! * 2 -- B(:), X(:), BND(:,:), W(:), or INDEX(:) size or shape violation.
                                                !! * 3 -- Input bounds are inconsistent.
                                                !! * 4 -- Exceed maximum number of iterations.
    integer,intent(in) :: max_iter !! maximum number of iterations (if <=0, then `3*n` is used)

    logical :: find, hitbnd, free1, free2, free
    integer :: iter     !! Iteration counter.
    integer :: itmax    !! Maximum number of iterations permitted.
                        !! Defaults to 3*n if `max_iter<=0`.
                        !! This is usually larger than required.
    integer :: m, n, i, ibound, ii, ip, iz, iz1, iz2, &
               j, jj, jz, l, lbound, npp1
    real(wp),dimension(size(a,2)) :: s
    real(wp),dimension(size(a,1)) :: z
    real(wp) :: alpha, asave, cc, range, &
                norm, sm, ss, t, unorm, up, ztest

    real(wp),parameter :: eps = epsilon(one)  !! Determines the relative linear dependence of a column vector
                                              !! for a variable moved from its initial value.  This is used in
                                              !! one place with the default value EPS=EPSILON(ONE).  Other
                                              !! values, larger or smaller may be needed for some problems.
                                              !! Library software will likely make this an optional argument.

    call initialize()
    ! The above call will set IERR.

    loopa: do

        ! Quit on error flag, or if all coefficients are already in the
        ! solution, .or. if M columns of A have been triangularized.
        if (ierr /=  0  .or.  iz1  > iz2 .or. nsetp >= m) exit loopa

        call select_another_coeff_to_solve_for()

        ! See if no index was found to be moved from set Z to set P.
        ! Then go to termination.
        if ( .not. find ) exit loopa

        call move_j_from_set_z_to_set_p()

        call test_set_p_against_constraints()

        ! The above call may set IERR.
        ! All coefficients in set P are strictly feasible.  Loop back.

    end do loopa

    call termination()

contains  ! These are internal subroutines.

subroutine initialize()

   m=size(a,1)
   n=size(a,2)
   if (m  <=  0 .or. n  <=  0) then
      ierr = 1
      return
   end if

   ! Check array sizes for consistency and with M and N.
   if (size(x) < n) then
      ierr=2
      return
   end if
   if (size(b) < m) then
      ierr=2
      return
   end if
   if (size(bnd,1) /= 2) then
      ierr=2
      return
   end if
   if (size(bnd,2) < n) then
      ierr=2
      return
   end if
   if (size(w) < n) then
      ierr=2
      return
   end if
   if (size(index) < n) then
      ierr=2
      return
   end if

   ierr = 0
   if (max_iter<=0) then
      itmax = 3*n
   else
      itmax = max_iter
   end if
   iter=0
   ! Initialize the array index().
   do i=1,n
      index(i)=i
   end do

   iz2=n
   iz1=1
   nsetp=0
   npp1=1

   ! Begin:  Loop on IZ to initialize  X().
   iz=iz1
   do
      if (iz  >  iz2 ) exit
      j=index(iz)
      if ( bnd(1,j)   <= -huge(one)) then
         if (bnd(2,j) >=  huge(one)) then
            x(j) = zero
         else
            x(j) = min(zero,bnd(2,j))
         end if
     else  if ( bnd(2,j) >= huge(one)) then
        x(j) = max(zero,bnd(1,j))
     else
        range = bnd(2,j) - bnd(1,j)
        if ( range <= zero ) then
           ! Here X(J) is constrained to a single value.
           index(iz)=index(iz2)
           index(iz2)=j
           iz=iz-1
           iz2=iz2-1
           x(j)=bnd(1,j)
           w(j)=zero
         else  if ( range  >  zero) then
            !! The following statement sets X(J) to 0 if the constraint interval
            !! includes 0, and otherwise sets X(J) to the endpoint of the
            !! constraint interval that is closest to 0.
            x(j) = max(bnd(1,j), min(bnd(2,j),zero))
         else
            ierr = 3
            return
         end if
   end if
      if ( abs(x(j)) > zero ) then
         ! Change B() to reflect a nonzero starting value for X(J).
         b(1:m)=b(1:m)-a(1:m,j)*x(j)
      end if
      iz=iz+1
   end do

end subroutine initialize

subroutine select_another_coeff_to_solve_for()

   !! 1. Search through set z for a new coefficient to solve for.
   !!    First select a candidate that is either an unconstrained
   !!    coefficient or else a constrained coefficient that has room
   !!    to move in the direction consistent with the sign of its dual
   !!    vector component.  Components of the dual (negative gradient)
   !!    vector will be computed as needed.
   !! 2. For each candidate start the transformation to bring this
   !!    candidate into the triangle, and then do two tests:  Test size
   !!    of new diagonal value to avoid extreme ill-conditioning, and
   !!    the value of this new coefficient to be sure it moved in the
   !!    expected direction.
   !! 3. If some coefficient passes all these conditions, set FIND = true,
   !!    The index of the selected coefficient is J = INDEX(IZ).
   !! 4. If no coefficient is selected, set FIND = false.

   find = .false.
   do iz=iz1,iz2
      j=index(iz)

      ! Set FREE1 = true if X(J) is not at the left end-point of its
      ! constraint region.
      ! Set FREE2 = true if X(J) is not at the right end-point of its
      ! constraint region.
      ! Set FREE = true if X(J) is not at either end-point of its
      ! constraint region.

     free1 = x(j) > bnd(1,j)
     free2 = x(j) < bnd(2,j)
     free = free1 .and. free2

     if ( free ) then
        call test_coef_j_for_diag_elt_and_direction_of_change()
     else
        ! Compute dual coefficient W(J).
        w(j)=dot_product(a(npp1:m,j),b(npp1:m))

        ! Can X(J) move in the direction indicated by the sign of W(J)?
        if ( w(j) < zero ) then
           if ( free1 ) call test_coef_j_for_diag_elt_and_direction_of_change()
        else  if ( w(j) > zero ) then
           if ( free2 ) call test_coef_j_for_diag_elt_and_direction_of_change()
        end if
     end if
     if ( find ) return
   end do

end subroutine select_another_coeff_to_solve_for

subroutine test_coef_j_for_diag_elt_and_direction_of_change()

   !! The sign of W(J) is OK for J to be moved to set P.
   !! Begin the transformation and check new diagonal element to avoid
   !! near linear dependence.

   asave=a(npp1,j)
   call htc (npp1, a(1:m,j), up)
   unorm = nrm2(a(1:nsetp,j))
   if ( abs(a(npp1,j)) > eps * unorm) then

      ! Column J is sufficiently independent.  Copy b into Z, update Z.
      z(1:m)=b(1:m)
      ! Compute product of transormation and updated right-hand side.
      norm=a(npp1,j)
      a(npp1,j)=up
      if (abs(norm) > zero) then
         sm=dot_product(a(npp1:m,j)/norm, z(npp1:m))/up
         z(npp1:m)=z(npp1:m)+sm*a(npp1:m,j)
         a(npp1,j)=norm
      end if

      if (abs(x(j)) >  zero) z(1:npp1)=z(1:npp1)+a(1:npp1,j)*x(j)
      ! Adjust Z() as though X(J) had been reset to zero.
      if ( free ) then
         find = .true.
      else
         !! Solve for ZTEST ( proposed new value for X(J) ).
         !! Then set FIND to indicate if ZTEST has moved away from X(J) in
         !! the expected direction indicated by the sign of W(J).
         ztest=z(npp1)/a(npp1,j)
         find = ( w(j) < zero  .and.  ztest < x(j) ) .or. &
                ( w(j) > zero  .and.  ztest > x(j) )
      end if
   end if

   ! If J was not accepted to be moved from set Z to set P,
   ! restore A(NNP1,J).  Failing these tests may mean the computed
   ! sign of W(J) is suspect, so here we set W(J) = 0.  This will
   ! not affect subsequent computation, but cleans up the W() array.
   if ( .not. find ) then
      a(npp1,j)=asave
      w(j)=zero
   end if

end subroutine test_coef_j_for_diag_elt_and_direction_of_change

subroutine move_j_from_set_z_to_set_p()

   !! The index  J=index(IZ)  has been selected to be moved from
   !! set Z to set P.  Z() contains the old B() adjusted as though X(J) = 0.
   !! A(*,J) contains the new Householder transformation vector.

   b(1:m)=z(1:m)
   index(iz)=index(iz1)
   index(iz1)=j
   iz1=iz1+1
   nsetp=npp1
   npp1=npp1+1
   ! The following loop can be null or not required.
   norm=a(nsetp,j)
   a(nsetp,j)=up
   if (abs(norm) > zero) then
      do jz=iz1,iz2
         jj=index(jz)
         sm=dot_product(a(nsetp:m,j)/norm, a(nsetp:m,jj))/up
         a(nsetp:m,jj)=a(nsetp:m,jj)+sm*a(nsetp:m,j)
      end do
   a(nsetp,j)=norm
   end if
   ! The following loop can be null.
   do l=npp1,m
      a(l,j)=zero
   end do!  L

   w(j)=zero

   ! Solve the triangular system.  Store this solution temporarily in Z().
   do i = nsetp, 1, -1
      if (i /= nsetp) z(1:i)=z(1:i)-a(1:i,ii)*z(i+1)
      ii=index(i)
      z(i)=z(i)/a(i,ii)
   end do

end subroutine move_j_from_set_z_to_set_p

subroutine test_set_p_against_constraints()

   loopb: do
      ! The solution obtained by solving the current set P is in the array Z().

      iter=iter+1
      if (iter > itmax) then
         ierr = 4
         exit loopb
      end if

      call see_if_all_constrained_coeffs_are_feasible()

      ! The above call sets HITBND.  If HITBND = true then it also sets
      ! ALPHA, JJ, and IBOUND.
      if ( .not. hitbnd ) exit loopb

      ! Here ALPHA will be between 0 and 1 for interpolation
      ! between the old X() and the new Z().
      do ip=1,nsetp
         l=index(ip)
         x(l)=x(l)+alpha*(z(ip)-x(l))
      end do

      i=index(jj)
      ! Note:  The exit test is done at the end of the loop, so the loop
      ! will always be executed at least once.
      do

         ! Modify A(*,*), B(*) and the index arrays to move coefficient I
         ! from set P to set Z.

         call move_coef_i_from_set_p_to_set_z

         if (nsetp <= 0) exit loopb

         ! See if the remaining coefficients in set P are feasible.  They should
         ! be because of the way ALPHA was determined.  If any are infeasible
         ! it is due to round-off error.  Any that are infeasible or on a boundary
         ! will be set to the boundary value and moved from set P to set Z.

          ibound = 0
          do jj=1,nsetp
             i=index(jj)
             if ( x(i) <= bnd(1,i)) then
                 ibound=1
                 exit
             else if ( x(i) >= bnd(2,i)) then
                 ibound=2
                 exit
             end if
         end do
         if (ibound <= 0) exit
      end do

      ! Copy B( ) into Z( ).  Then solve again and loop back.
      z(1:m)=b(1:m)

      do i = nsetp, 1, -1
         if (i /= nsetp) z(1:i)=z(1:i)-a(1:i,ii)*z(i+1)
         ii=index(i)
         z(i)=z(i)/a(i,ii)
      end do
   end do loopb

   ! The following loop can be null.
   do ip=1,nsetp
      i=index(ip)
      x(i)=z(ip)
   end do

end subroutine test_set_p_against_constraints

subroutine see_if_all_constrained_coeffs_are_feasible()

   !! See if each coefficient in set P is strictly interior to its constraint region.
   !! If so, set HITBND = false.
   !! If not, set HITBND = true, and also set ALPHA, JJ, and IBOUND.
   !! Then ALPHA will satisfy  0.  < ALPHA  <=  1.

   alpha=two
   do ip=1,nsetp
      l=index(ip)
      if (z(ip) <= bnd(1,l)) then
         ! Z(IP) HITS LOWER BOUND
         lbound=1
      else  if (z(ip) >= bnd(2,l)) then
         ! Z(IP) HITS UPPER BOUND
         lbound=2
      else
         lbound = 0
      end if

      if ( lbound /= 0 ) then
         t=(bnd(lbound,l)-x(l))/(z(ip)-x(l))
         if (alpha > t) then
           alpha=t
           jj=ip
           ibound=lbound
         end if
      end if
   end do
   hitbnd = abs(alpha - two) > zero

end subroutine see_if_all_constrained_coeffs_are_feasible

subroutine move_coef_i_from_set_p_to_set_z()

   x(i)=bnd(ibound,i)
   if (abs(x(i)) > zero .and. jj > 0) b(1:jj)=b(1:jj)-a(1:jj,i)*x(i)

   ! The following loop can be null.
   do j = jj+1, nsetp
      ii=index(j)
      index(j-1)=ii
      call rotg (a(j-1,ii),a(j,ii),cc,ss)
      sm=a(j-1,ii)
      ! The plane rotation is applied to two rows of A and the right-hand
      ! side.  One row is moved to the scratch array S and then the updates
      ! are computed.  The intent is for array operations to be performed
      ! and minimal extra data movement.  One extra rotation is applied
      ! to column II in this approach.
      s=a(j-1,1:n)
      a(j-1,1:n)=cc*s+ss*a(j,1:n)
      a(j,1:n)=cc*a(j,1:n)-ss*s
      a(j-1,ii)=sm
      a(j,ii)=zero
      sm=b(j-1)
      b(j-1)=cc*sm+ss*b(j)
      b(j)=cc*b(j)-ss*sm
   end do

   npp1=nsetp
   nsetp=nsetp-1
   iz1=iz1-1
   index(iz1)=i

end subroutine move_coef_i_from_set_p_to_set_z

subroutine termination()

   if (ierr <= 0) then
      ! Compute the norm of the residual vector.
      sm=zero
      if (npp1 <= m) then
         sm=nrm2(b(npp1:m))
      else
         w(1:n)=zero
      end if
      rnorm=sm
   end if

end subroutine termination

pure subroutine rotg(sa,sb,c,s)

   real(wp),intent(inout) :: sa
   real(wp),intent(in)    :: sb
   real(wp),intent(out)   :: c
   real(wp),intent(out)   :: s

   real(wp) :: roe,scale,r

   roe = sb
   if ( abs(sa) > abs(sb) ) roe = sa
   scale = abs(sa) + abs(sb)
   if ( scale <= zero ) then
      c = one
      s = zero
   else
      r = scale*sqrt((sa/scale)**2 + (sb/scale)**2)
      if (roe < zero) r=-r
      c = sa/r
      s = sb/r
      sa = r
   end if

end subroutine rotg

pure function nrm2 (x) result(norm)

   !!  NRM2 returns the Euclidean norm of a vector via the function
   !!  name, so that
   !!
   !!  `NRM2 := sqrt( x'*x )`
   !!
   !!### See also
   !!  * [[dnrm2]]

   real(wp),dimension(:),intent(in) :: x
   real(wp) :: norm

   real(wp) :: absxi, scale, ssq
   integer :: n, ix

   n=size(x)
   if ( n < 1) then
      norm  = zero
   else if ( n == 1 ) then
      norm  = abs( x( 1 ) )
   else
      scale = zero
      ssq   = one
      do ix = 1, n
         absxi = abs( x( ix ) )
          if (absxi > zero ) then
             if ( scale < absxi ) then
                ssq   = one + ssq*( scale/absxi )**2
                scale = absxi
             else
                ssq   = ssq + ( absxi/scale )**2
             end if
         end if
     end do
     norm  = scale * sqrt( ssq )
   end if

end function nrm2

pure subroutine htc (p, u, up)

   !! Construct a Householder transformation.

   integer,intent(in)  :: p
   real(wp),dimension(:),intent(inout) :: u
   real(wp),intent(out) :: up

   real(wp) :: vnorm

   vnorm=nrm2(u(p:size(u)))
   if (u(p) > zero) vnorm=-vnorm
   up=u(p)-vnorm
   u(p)=vnorm

end subroutine htc

end subroutine bvls

!*******************************************************************************
    end module bvls_module
!*******************************************************************************